home *** CD-ROM | disk | FTP | other *** search
Text File | 1997-07-30 | 49.2 KB | 1,750 lines |
- : # use perl -*- mode: Perl; -*-
- eval 'exec perl -S $0 "$@"'
- if $runnning_under_some_shell;
-
- #
- # weblint - pick fluff off WWW pages (html).
- #
- # Copyright (C) 1994, 1995, 1996 Neil Bowers. All rights reserved.
- #
- # See README for additional blurb.
- # Bugs, comments, suggestions welcome: neilb@cre.canon.co.uk
- #
- # Latest version is available as:
- # ftp://ftp.cre.canon.co.uk/pub/weblint/weblint.tar.gz
- #
-
- $VERSION = '1.017';
- ($PROGRAM = $0) =~ s@.*/@@;
- @TMPDIR_OPTIONS = ('/usr/tmp', '/tmp', '/var/tmp', '/temp');
- $TMPDIR = &PickTmpdir(@TMPDIR_OPTIONS);
- $SITE_DIR = '';
- $USER_RCFILE = $ENV{'WEBLINTRC'} || "$ENV{'HOME'}/.weblintrc";
- $SITE_RCFILE = $SITE_DIR.'/global.weblintrc' if $SITE_DIR;
-
-
- #------------------------------------------------------------------------
- # $version - the string which is displayed with -v or -version
- #------------------------------------------------------------------------
- $versionString=<<EofVersion;
- This is weblint, version $VERSION
-
- Copyright 1994,1995,1996 Neil Bowers
-
- Weblint may be used and copied only under the terms of the Artistic
- License, which may be found in the Weblint source kit, or at:
- http://www.cre.canon.co.uk/~neilb/weblint/artistic.html
- EofVersion
-
-
- #------------------------------------------------------------------------
- # $usage - usage string displayed with the -U command-line switch
- #------------------------------------------------------------------------
- $usage=<<EofUsage;
- $PROGRAM v$VERSION - pick fluff off web pages (HTML)
- -d : disable specified warnings (warnings separated by commas)
- -e : enable specified warnings (warnings separated by commas)
- -f filename : alternate configuration file
- -stderr : print warnings to STDERR rather than STDOUT
- -i : ignore case in element tags
- -l : ignore symlinks when recursing in a directory
- -pedantic : turn on all warnings, except for case of element tags
- -s : give short warning messages (filename not printed)
- -t : terse warning mode, useful mainly for testsuite
- -todo : print the todo list for $PROGRAM
- -help | -U : display this usage message
- -urlget : specify the command used to get a URL
- -version | -v : display version
- -warnings : list supported warnings
- -x <extn> : HTML extension to include (supported: Java, Netscape)
-
- To check one or more HTML files, run weblint thusly:
- weblint file1.html [... fileN.html]
- If a file is in fact a directory, weblint will recurse, checking all files.
-
- To include the Netscape extensions: weblint -x Netscape file.html
- EofUsage
-
- #------------------------------------------------------------------------
- # $todo - string displayed with the -todo switch
- #------------------------------------------------------------------------
- $todo=<<EofToDo;
- The Weblint toDo list can be seen at:
- http://www.cre.canon.co.uk/~neilb/weblint/todo/
- EofToDo
-
- *WARNING = *STDOUT;
-
- # obsolete tags
- $obsoleteTags = 'PLAINTEXT|XMP|LISTING|COMMENT';
-
- $maybePaired = 'LI|DT|DD|P|ROW|TD|TH|TR';
-
- $pairElements = 'A|ABBREV|ABOVE|ACRONYM|ADDRESS|ARRAY|AU|'.
- 'HTML|HEAD|BANNER|BAR|BELOW|BIG|BLOCKQUOTE|BODY|BOX|BQ|BT|'.
- 'CAPTION|CREDIT|DDOT|DEL|DIV|DOT|'.
- 'FIG|FN|H1|H2|H3|H4|H5|H6|HAT|INS|LH|OVERLAY|'.
- 'B|I|U|TT|STRONG|EM|CODE|KBD|VAR|DFN|CITE|SAMP|Q|LANG|'.
- 'UL|OL|DL|'.
- 'MATH|MENU|DIR|FORM|NOTE|PERSON|ROOT|'.
- 'S|SELECT|SMALL|SQRT|STRIKE|STYLE|'.
- 'SUB|SUP|T|TABLE|TEXT|TEXTAREA|TILDE|TITLE|VEC|CODE|PRE|'.
- $maybePaired.'|'.
- $obsoleteTags;
-
- # container elements which shouldn't have leading or trailing whitespace
- $cuddleContainers = 'A|H1|H2|H3|H4|H5|H6|TITLE|LI';
-
- # expect to see these tags only once
- %onceOnly = ('HTML', 1, 'HEAD', 1, 'BODY', 1, 'TITLE', 1);
-
- @fontElements = ('EM', 'CITE', 'STRONG', 'CODE', 'SAMP', 'KBD', 'VAR',
- 'DFN', 'Q', 'LANG', 'AU', 'PERSON', 'ACRONYM', 'ABBREV',
- 'INS', 'DEL',
- 'B', 'I', 'TT', 'U', 'S', 'BIG', 'SMALL', 'SUB', 'SUP');
-
- %physicalFontElements =
- (
- 'B', 'STRONG',
- 'I', 'EM',
- 'TT', 'CODE, SAMP, KBD, or VAR'
- );
-
- # expect these tags to have attributes
- # these are elements which have no required attributes, but we expect to
- # see at least one of the attributes
- $expectArgsRE = 'A';
-
- # these tags can only appear in the head element
- $headTagsRE = 'TITLE|NEXTID|LINK|BASE|META';
-
- %requiredContext =
- (
- 'ABOVE', 'MATH',
- 'ARRAY', 'MATH',
- 'ATOP', 'BOX',
- 'BAR', 'MATH',
- 'BELOW', 'MATH',
- 'BOX', 'MATH',
- 'BT', 'MATH',
- 'CAPTION', 'TABLE|FIG',
- 'CHOOSE', 'BOX',
- 'DD', 'DL',
- 'DDOT', 'MATH',
- 'DOT', 'MATH',
- 'DT', 'DL',
- 'HAT', 'MATH',
- 'INPUT', 'FORM',
- 'ITEM', 'ROW',
- 'LEFT', 'BOX',
- 'LH', 'DL|OL|UL',
- 'LI', 'DIR|MENU|OL|UL',
- 'OF', 'ROOT',
- 'OPTION', 'SELECT',
- 'OVER', 'BOX',
- 'OVERLAY', 'FIG',
- 'RIGHT', 'BOX',
- 'ROOT', 'MATH',
- 'ROW', 'ARRAY',
- 'SELECT', 'FORM',
- 'SQRT', 'MATH',
- 'T', 'MATH',
- 'TD', 'TR',
- 'TEXT', 'MATH',
- 'TEXTAREA', 'FORM',
- 'TH', 'TR',
- 'TILDE', 'MATH',
- 'TR', 'TABLE',
- 'VEC', 'MATH'
- );
-
- # these tags are allowed to appear in the head element
- %okInHead = ('ISINDEX', 1, 'TITLE', 1, 'NEXTID', 1, 'LINK', 1,
- 'BASE', 1, 'META', 1, 'RANGE', 1, 'STYLE', 1, '!--', 1);
-
- # expect to see these at least once.
- # html-outer covers the HTML element
- @expectedTags = ('HEAD', 'TITLE', 'BODY');
-
- # elements which cannot be nested
- $nonNest = 'A|FORM';
-
- $netscapeElements = 'NOBR|WBR|FONT|FRAME|FRAMESET|NOFRAMES|BASEFONT|BLINK|'.
- 'CENTER|MAP|AREA|SCRIPT';
- $javaElements = 'APPLET|PARAM';
-
- #
- # This is a regular expression for all legal elements
- # UPDATE: need to remove duplication in legalElements and pairElements
- #
- $legalElements =
- 'A|ABBREV|ABOVE|ACRONYM|ADDRESS|ARRAY|ATOP|AU|'.
- 'B|BANNER|BAR|BASE|BELOW|BIG|BLOCKQUOTE|BODY|BOX|BQ|BR|BT|'.
- 'CAPTION|CHOOSE|CITE|CODE|CREDIT|'.
- 'DD|DDOT|DFN|DEL|DIR|DIV|DL|DOT|DT|'.
- 'EM|FIG|FN|FORM|H1|H2|H3|H4|H5|H6|HAT|HEAD|HR|HTML|'.
- 'I|IMG|INPUT|INS|ISINDEX|ITEM|KBD|'.
- 'LANG|LEFT|LH|LI|LINK|MATH|MENU|META|NEXTID|NOTE|'.
- 'OF|OL|OPTION|OVER|OVERLAY|P|PERSON|PRE|Q|RANGE|RIGHT|ROOT|ROW|'.
- 'SAMP|SELECT|S|SMALL|SQRT|STRIKE|STRONG|STYLE|SUB|SUP|'.
- 'T|TAB|TABLE|TD|TEXT|TEXTAREA|TH|TILDE|TITLE|TR|TT|U|UL|VAR|VEC|'.
- $obsoleteTags;
-
- # This table holds the valid attributes for elements
- # Where an element does not have an entry, this implies that the element
- # does not take any attributes
- %validAttributes =
- (
- 'A', 'ID|LANG|CLASS|HREF|MD|NAME|SHAPE|TITLE|REL|REV',
- 'ABOVE', 'SYM',
- 'ADDRESS', 'ID|LANG|CLASS|CLEAR|NOWRAP',
- 'ARRAY', 'ALIGN|COLDEF|LDELIM|RDELIM|LABELS',
- 'BANNER', 'ID|LANG|CLASS',
- 'BASE', 'HREF',
- 'BR', 'ID|LANG|CLASS|CLEAR',
- 'BLOCKQUOTE', 'ID|LANG|CLASS|CLEAR|NOWRAP',
- 'BODY', 'ID|LANG|CLASS|BACKGROUND',
- 'BOX', 'SIZE',
- 'BQ', 'ID|LANG|CLASS|CLEAR|NOWRAP',
- 'BELOW', 'SYM',
- 'CAPTION', 'ID|LANG|CLASS|ALIGN',
- 'CREDIT', 'ID|LANG|CLASS',
- 'DD', 'ID|LANG|CLASS|CLEAR',
- 'DIV', 'ID|LANG|CLASS|ALIGN|NOWRAP|CLEAR',
- 'DL', 'ID|LANG|CLASS|CLEAR|COMPACT',
- 'DT', 'ID|LANG|CLASS|CLEAR',
- 'FIG', 'ID|LANG|CLASS|CLEAR|NOFLOW|SRC|MD|ALIGN|WIDTH|HEIGHT|'.
- 'UNITS|IMAGEMAP',
- 'FN', 'ID|LANG|CLASS',
- 'FORM', 'ACTION|METHOD|ENCTYPE|SCRIPT',
- 'H1', 'ID|LANG|CLASS|ALIGN|CLEAR|SEQNUM|SKIP|DINGBAT|SRC|MD|NOWRAP',
- 'H2', 'ID|LANG|CLASS|ALIGN|CLEAR|SEQNUM|SKIP|DINGBAT|SRC|MD|NOWRAP',
- 'H3', 'ID|LANG|CLASS|ALIGN|CLEAR|SEQNUM|SKIP|DINGBAT|SRC|MD|NOWRAP',
- 'H4', 'ID|LANG|CLASS|ALIGN|CLEAR|SEQNUM|SKIP|DINGBAT|SRC|MD|NOWRAP',
- 'H5', 'ID|LANG|CLASS|ALIGN|CLEAR|SEQNUM|SKIP|DINGBAT|SRC|MD|NOWRAP',
- 'H6', 'ID|LANG|CLASS|ALIGN|CLEAR|SEQNUM|SKIP|DINGBAT|SRC|MD|NOWRAP',
- 'HR', 'ID|CLASS|CLEAR|SRC|MD',
- 'HTML', 'VERSION|URN|ROLE',
- 'IMG', 'ID|LANG|CLASS|SRC|MD|WIDTH|HEIGHT|UNITS|ALIGN|ALT|ISMAP',
- 'INPUT', 'ID|LANG|CLASS|TYPE|NAME|VALUE|DISABLED|ERROR|CHECKED|SIZE|'.
- 'MAXLENGTH|MIN|MAX|ACCEPT|SRC|MD|ALIGN',
- 'ITEM', 'ALIGN|COLSPAN|ROWSPAN',
- 'LH', 'ID|LANG|CLASS',
- 'LI', 'ID|LANG|CLASS|CLEAR|SRC|MD|DINGBAT|SKIP',
- 'LINK', 'HREF|REL|REV|URN|TITLE|METHODS',
- 'MATH', 'ID|CLASS|BOX',
- 'META', 'HTTP-EQUIV|NAME|CONTENT',
- 'NEXTID', 'N',
- 'NOTE', 'ID|LANG|CLASS|CLEAR|SRC|MD',
- 'OL', 'ID|LANG|CLASS|CLEAR|CONTINUE|SEQNUM|COMPACT',
- 'OPTION', 'ID|LANG|CLASS|DISABLED|ERROR|VALUE|SELECTED|SHAPE',
- 'OVERLAY', 'SRC|MD|UNITS|X|Y|WIDTH|HEIGHT',
- 'P', 'ID|LANG|CLASS|ALIGN|CLEAR|NOWRAP',
- 'PRE', 'ID|LANG|CLASS|CLEAR|WIDTH',
- 'RANGE', 'ID|CLASS|FROM|UNTIL',
- 'ROW', 'ALIGN|COLSPAN|ROWSPAN',
- 'SELECT', 'ID|LANG|CLASS|NAME|MULTIPLE|DISABLED|ERROR|SRC|MD|WIDTH|'.
- 'HEIGHT|UNITS|ALIGN|SIZE',
- 'STYLE', 'NOTATION',
- 'TAB', 'ID|INDENT|TO|ALIGN|DP',
- 'TABLE', 'ID|LANG|CLASS|CLEAR|NOFLOW|ALIGN|UNITS|COLSPEC|DP|WIDTH|'.
- 'BORDER|NOWRAP',
- 'TD', 'ID|LANG|CLASS|COLSPAN|ROWSPAN|ALIGN|DP|VALIGN|NOWRAP|'.
- 'AXIS|AXES',
- 'TEXTAREA', 'ID|LANG|CLASS|NAME|ROWS|COLS|DISABLED|ERROR|ALIGN',
- 'TH', 'ID|LANG|CLASS|COLSPAN|ROWSPAN|ALIGN|DP|VALIGN|NOWRAP|'.
- 'AXIS|AXES',
- 'TR', 'ID|LANG|CLASS|ALIGN|DP|VALIGN|NOWRAP',
- 'UL', 'ID|LANG|CLASS|CLEAR|PLAIN|SRC|MD|DINGBAT|WRAP|COMPACT',
- );
-
- foreach $elt (@fontElements)
- {
- $validAttributes{$elt} = 'ID|LANG|CLASS';
- }
-
- %requiredAttributes =
- (
- 'BASE', 'HREF',
- 'FORM', 'ACTION',
- 'IMG', 'SRC',
- 'LINK', 'HREF',
- 'NEXTID', 'N',
- 'SELECT', 'NAME',
- 'STYLE', 'NOTATION',
- 'TEXTAREA', 'NAME|ROWS|COLS'
- );
-
- $colorRE = '#[0-9a-fA-F]{6}';
- %attributeFormat =
- (
- 'ALIGN', 'BOTTOM|MIDDLE|TOP|LEFT|CENTER|RIGHT|JUSTIFY|'.
- 'BLEEDLEFT|BLEEDRIGHT|DECIMAL',
- 'BGCOLOR', $colorRE,
- 'CLEAR', 'LEFT|RIGHT|ALL',
- 'COLS', '\d+',
- 'COLSPAN', '\d+',
- 'HEIGHT', '\d+',
- 'INDENT', '\d+',
- 'MAXLENGTH', '\d+',
- 'METHOD', 'GET|POST',
- 'ROWS', '\d+',
- 'ROWSPAN', '\d+',
- 'SEQNUM', '\d+',
- 'SIZE', '\d+|\d+,\d+',
- 'SKIP', '\d+',
- 'TYPE', 'CHECKBOX|HIDDEN|IMAGE|PASSWORD|RADIO|RESET|SUBMIT|TEXT|',
- 'UNITS', 'PIXELS|EN',
- 'VALIGN', 'TOP|MIDDLE|BOTTOM|BASELINE',
- 'WIDTH', '\d+',
- 'WRAP', 'OFF|VIRTUAL|PHYSICAL',
- 'X', '\d+',
- 'Y', '\d+'
- );
-
- %netscapeAttributes =
- (
- 'A', 'TARGET',
- 'AREA', 'SHAPE|HREF|COORDS|NOHREF|TARGET',
- 'BASE', 'TARGET',
- 'BASEFONT', 'SIZE',
- 'BODY', 'BGCOLOR|TEXT|LINK|VLINK|ALINK',
- 'FONT', 'COLOR|SIZE',
- 'FORM', 'ENCTYPE|TARGET',
- 'FRAME', 'SRC|NAME|MARGINWIDTH|MARGINHEIGHT|SCROLLING|NORESIZE',
- 'FRAMESET', 'ROWS|COLS',
- 'HR', 'SIZE|WIDTH|ALIGN|NOSHADE',
- 'IMG', 'BORDER|VSPACE|HSPACE|LOWSRC|USEMAP',
- 'ISINDEX', 'PROMPT',
- 'LI', 'TYPE|VALUE',
- 'MAP', 'NAME',
- 'OL', 'TYPE|START',
- 'SCRIPT', 'LANGUAGE',
- 'TABLE', 'CELLSPACING|CELLPADDING',
- 'TEXTAREA', 'WRAP',
- 'TD', 'WIDTH|HEIGHT',
- 'TH', 'WIDTH|HEIGHT',
- 'TR', 'HEIGHT',
- 'UL', 'TYPE'
- );
-
- $msElements = 'AREA|BASEFONT|BGSOUND|CENTER|FONT|MAP|MARQUEE|NOBR|WBR';
- %msAttributes =
- (
- 'AREA', 'SHAPE|HREF|COORDS|NOHREF',
- 'BASEFONT', 'SIZE',
- 'BGSOUND', 'SRC|LOOP',
- 'BODY', 'ALINK|BGCOLOR|BGPROPERTIES|LEFTMARGIN|LINK|TEXT|TOPMARGIN|VLINK',
- 'CAPTION', 'VALIGN',
- 'FONT', 'COLOR|FACE|SIZE',
- 'HR', 'ALIGN|NOSHADE|SIZE|WIDTH',
- 'IMG', 'BORDER|CONTROLS|DYNSRC|HSPACE|LOOP|START|USEMAP|VSPACE',
- 'ISINDEX', 'ACTION|PROMPT',
- 'LI', 'TYPE|VALUE',
- 'MAP', 'NAME',
- 'MARQUEE', 'ALIGN|BEHAVIOR|BGCOLOR|DIRECTION|HEIGHT|HSPACE|LOOP|'.
- 'SCROLLAMOUNT|SCROLLDELAY|VSPACE|WIDTH',
- 'OL', 'START|TYPE',
- 'TABLE', 'BGCOLOR|BORDERCOLOR|BORDERCOLORLIGHT|BORDERCOLORDARK|'.
- 'CELLSPACING|CELLPADDING|VALIGN',
- 'TD', 'BGCOLOR|BORDERCOLOR|BORDERCOLORLIGHT|BORDERCOLORDARK|VALIGN|'.
- 'HEIGHT',
- 'TH', 'BGCOLOR|BORDERCOLOR|BORDERCOLORLIGHT|BORDERCOLORDARK|VALIGN|'.
- 'HEIGHT',
- 'TR', 'BGCOLOR|BORDERCOLOR|BORDERCOLORLIGHT|BORDERCOLORDARK|VALIGN|'.
- 'HEIGHT',
- );
- $msColors = 'Black|White|Green|Maroon|Olive|Navy|Purple|Gray|'.
- 'Red|Yellow|Blue|Teal|Lime|Aqua|Fuchsia|Silver';
-
- %mustFollow =
- (
- 'LH', 'UL|OL|DL',
- 'OVERLAY', 'FIG',
- 'HEAD', 'HTML',
- 'BODY', '/HEAD',
- 'FRAMESET', '/HEAD|/FRAME|/FRAMESET|/NOFRAMES',
- '/HTML', '/BODY|/FRAMESET',
- );
-
- %badTextContext =
- (
- 'HEAD', 'BODY, or TITLE perhaps',
- 'UL', 'LI or LH',
- 'OL', 'LI or LH',
- 'DL', 'DT or DD',
- 'TABLE', 'TD or TH',
- 'TR', 'TD or TH'
- );
-
- %variable =
- (
- 'directory-index', 'index.html',
- 'file-extensions', 'html, htm',
- 'url-get', '',
- 'message-style', 'lint'
- );
-
- @options = ('d=s', 'e=s', 'f=s', 'stderr', 'help', 'i', 'l', 's', 't',
- 'todo', 'U',
- 'noglobals', 'pedantic', 'urlget=s', 'v', 'version', 'warnings',
- 'x=s');
-
- $exit_status = 0;
-
- require 'newgetopt.pl';
- require 'find.pl';
-
- die "$usage" unless @ARGV > 0;
-
- # escape the `-' command-line switch (for stdin), so NGetOpt don't mess wi' it
- grep(s/^-$/\tstdin\t/, @ARGV);
-
- &NGetOpt(@options) || die "use -U switch to display usage statement\n";
-
- # put back the `-' command-line switch, if it was there
- grep(s/^\tstdin\t$/-/, @ARGV);
-
- die "$versionString\n" if $opt_v || $opt_version;
- die "$usage" if $opt_u || $opt_help;
-
- &ReadDefaults();
-
- # Read configuration
- if ($opt_f)
- {
- &ReadConfigFile($opt_f);
- }
- elsif (-f $USER_RCFILE)
- {
- &ReadConfigFile($USER_RCFILE);
- }
- elsif (! $opt_noglobals && -f $SITE_RCFILE)
- {
- &ReadConfigFile($SITE_RCFILE);
- }
-
- # must do this after reading their config file to see a valid url-get
- &PrintToDo() if $opt_todo;
-
- # pedantic command-line switch turns on all warnings except case checking
- if ($opt_pedantic)
- {
- foreach $warning (keys %enabled)
- {
- &enableWarning($warning, 1);
- }
- &enableWarning('lower-case', 0);
- &enableWarning('upper-case', 0);
- &enableWarning('bad-link', 0);
- &enableWarning('require-doctype', 0);
- }
-
- &AddExtension("\L$opt_x") if $opt_x;
- $variable{'message-style'} = 'short' if $opt_s;
- $variable{'message-style'} = 'terse' if $opt_t;
- $variable{'url-get'} = $opt_urlget if $opt_urlget;
- *WARNING = *STDERR if $opt_stderr;
- &ListWarnings() if $opt_warnings;
-
- ($fileExtensions = $variable{'file-extensions'}) =~ s/,\s*/\|/g;
-
- # WARNING file handle is default
- select(WARNING);
-
- $opt_l = 1 if $ignore{'SYMLINKS'};
-
- # -d to disable warnings
- if ($opt_d)
- {
- for (split(/,/,$opt_d))
- {
- &enableWarning($_, 0);
- }
- }
-
- # -e to enable warnings
- if ($opt_e)
- {
- for (split(/,/,$opt_e))
- {
- &enableWarning($_, 1) || next;
- }
- }
-
- # -i option to ignore case in element tags
- if ($opt_i)
- {
- $enabled{'lower-case'} = $enabled{'upper-case'} = 0;
- }
-
- if (defined $variable{'directory-index'})
- {
- @dirIndices = split(/\s*,\s*/, $variable{'directory-index'});
- }
-
- $argc = int(@ARGV);
- while (@ARGV > 0)
- {
- $arg = shift(@ARGV);
-
- &CheckURL($arg), next if $arg =~ m!^(http|gopher|ftp)://!;
-
- &find($arg), next if -d $arg;
-
- if ($opt_l && -l $arg && $argc == 1)
- {
- warn "$PROGRAM: $arg is a symlink, but I'll check it anyway\n";
- }
-
- &WebLint($arg), next if (-f $arg && -r $arg) || $arg eq '-';
-
- print "$PROGRAM: could not read $arg: $!\n";
- }
-
- exit $exit_status;
-
- #========================================================================
- # Function: WebLint
- # Purpose: This is the high-level interface to the checker. It takes
- # a file and checks for fluff.
- #========================================================================
- sub WebLint
- {
- local($filename,$relpath) = @_;
- local(@tags) = ();
- local($tagRE) = ('');
- local(@taglines) = ();
- local(@orphans) = ();
- local(@orphanlines) = ();
- local(%seenPage);
- local(%seenTag);
- local(%whined);
- local(*PAGE);
- local($line) = ('');
- local($id, $ID);
- local($tag, $tagNum);
- local($closing);
- local($tail);
- local(%args);
- local($arg);
- local($rest);
- local($lastNonTag);
- local(@notSeen);
- local($seenMailtoLink) = (0);
- local($matched);
- local($matchedLine);
- local($novalue);
- local($heading);
- local($headingLine);
- local($commentline);
- local($_);
-
-
- if ($filename eq '-')
- {
- *PAGE = *STDIN;
- $filename = 'stdin';
- }
- else
- {
- return if defined $seenPage{$filename};
- if (-d $filename)
- {
- print "$PROGRAM: $filename is a directory.\n";
- $exit_status = 0;
- return;
- }
- $seenPage{$filename}++;
- open(PAGE,"<$filename") || do
- {
- print "$PROGRAM: could not read file $filename: $!\n";
- $exit_status = 0;
- return;
- };
- $filename = $relpath if defined $relpath;
- }
-
- undef $heading;
- $tagNum = 0;
-
- READLINE:
- while (<PAGE>)
- {
- $line .= $_;
- # $line =~ s/\n/ /g;
-
- while ($line =~ /</o)
- {
- $tail = $'; #'
- undef $lastNonTag;
- if ($` !~ /^\s*$/o)
- {
- $lastNonTag = $`;
-
- # check for illegal text context
- if (defined $badTextContext{$tags[$#tags]})
- {
- &whine($., 'bad-text-context',$tags[$#tags],
- $badTextContext{$tags[$#tags]});
- }
-
- $lnt = $lastNonTag;
- while ($lnt =~ />/o)
- {
- $nl = $lnt = $';
- $nl =~ s/[^\n]//go;
- if ('PRE' =~ /^($tagRE)$/)
- {
- &whine($. - length($nl), 'meta-in-pre', '>', '>');
- }
- else
- {
- &whine($. - length($nl), 'literal-metacharacter', '>', '>');
- }
- }
- }
-
- #--------------------------------------------------------
- #== SGML comment: <!-- ... blah blah ... -->
- #--------------------------------------------------------
- if ($tail =~ /^!--/o)
- {
-
- $commentline = $. unless defined $commentline;
-
- # push lastNonTag onto word list for spell checking
-
- $ct = $';
- next READLINE unless $ct =~ /--\s*>/o;
-
- undef $commentline;
-
- $comment = $`;
- $line = $';
-
- # markup embedded in comment can confuse some (most? :-) browsers
- &whine($., 'markup-in-comment') if $comment =~ /<\s*[^>]+>/o;
- next;
- }
- undef $commentline;
-
- next READLINE unless $tail =~ /^(\s*)([^>]*)>/o;
-
-
- &whine($., 'leading-whitespace', $2) if $1 ne '';
-
- $line = $';
- ($tag = $2) =~ s/\n/ /go;
- $id = $tag;
-
- &whine($., 'unknown-element', $id),next if $id =~ /^\s*$/o;
-
- # push lastNonTag onto word list for spell checking
-
- undef $tail;
- undef $closing;
-
- #-- <!DOCTYPE ... > is ignored for now.
- $seenTag{'DOCTYPE'}=1,next if $id =~ /^!doctype/io;
-
- if (!$whined{'require-doctype'} && !$seenTag{'DOCTYPE'})
- {
- &whine($., 'require-doctype');
- $whined{'require-doctype'} = 1;
- }
-
- $closing = 0;
- if ($id =~ m@^/@o)
- {
- $id =~ s@^/@@o;
- $ID = "\U$id";
- $closing = 1;
- }
-
- &CheckAttributes();
-
- $TAG = ($closing ? '/' : '').$ID;
- if (defined $mustFollow{$TAG})
- {
- $ok = 0;
- foreach $pre (split(/\|/, $mustFollow{$TAG}))
- {
- ($ok=1),last if $pre eq $lastTAG;
- }
- if (!$ok || $lastNonTag !~ /^\s*$/o)
- {
- &whine($., 'must-follow', $TAG, $mustFollow{$TAG});
- }
- }
-
- #-- catch empty container elements
- if ($closing && $ID eq $lastTAG && $lastNonTag =~ /^\s*$/o
- && $tagNums[$#tagNums] == ($tagNum - 1)
- && $ID ne 'TEXTAREA' && $ID ne 'TD')
- {
- &whine($., 'empty-container', $ID);
- }
-
- #-- special case for empty optional container elements
- if (!$closing && $ID eq $tags[$#tags] && $lastTAG eq $ID
- && $ID =~ /^($maybePaired)$/
- && $tagNums[$#tagNums] == ($tagNum - 1)
- && $lastNonTag =~ /^\s*$/o)
- {
- $t = pop @tags;
- $tline = pop @taglines;
- pop @tagNums;
- &whine($tline, 'empty-container', $ID);
- $tagRE = join('|',@tags);
- }
-
- #-- whine about unrecognized element, and do no more checks ----
- if ($id !~ /^($legalElements)$/io)
- {
- if ($id =~ /^($netscapeElements|$javaElement|$msElement)$/io)
- {
- &whine($., 'extension-markup', ($closing ? "/$id" : "$id"));
- }
- else
- {
- &whine($., 'unknown-element', ($closing ? "/$id" : "$id"));
- }
- next;
- }
-
- if ($closing == 0 && defined $requiredAttributes{$ID})
- {
- @argkeys = keys %args;
- foreach $attr (split(/\|/,$requiredAttributes{$ID}))
- {
- unless (defined $args{$attr})
- {
- &whine($., 'required-attribute', $attr, $id);
- }
- }
- }
- elsif ($closing == 0 && $id =~ /^($expectArgsRE)$/io)
- {
- &whine($., 'expected-attribute', $id) unless defined %args;
- }
-
- #--------------------------------------------------------
- #== check case of tags
- #--------------------------------------------------------
- &whine($., 'upper-case', $id) if $id ne $ID;
- &whine($., 'lower-case', $id) if $id ne "\L$id";
-
-
- #--------------------------------------------------------
- #== if tag id is /foo, then strip slash, and mark as a closer
- #--------------------------------------------------------
- if ($closing)
- {
- if ($ID !~ /^($pairElements)$/o)
- {
- &whine($., 'illegal-closing', $id);
- }
-
- if ($ID eq 'A' && $lastNonTag =~ /^\s*here\s*$/io)
- {
- &whine($., 'here-anchor');
- }
-
- if ($ID eq 'TITLE' && length($lastNonTag) > 64)
- {
- &whine($., 'title-length');
- }
-
- #-- end of HEAD, did we see a TITLE in the HEAD element? ----
- &whine($., 'require-head') if $ID eq 'HEAD' && !$seenTag{'TITLE'};
-
- #-- was there a <LINK REV=MADE HREF="mailto:.."> element in HEAD?
- &whine($., 'mailto-link') if $ID eq 'HEAD' && $seenMailtoLink == 0;
- }
- else
- {
- #--------------------------------------------------------
- # do context checks. Should really be a state machine.
- #--------------------------------------------------------
-
- if (defined $physicalFontElements{$ID})
- {
- &whine($., 'physical-font', $ID, $physicalFontElements{$ID});
- }
-
- if ($ID =~ /^H[1-6]$/o && 'A' =~ /^($tagRE)$/)
- {
- &whine($., 'heading-in-anchor', $ID);
- }
-
- if ($ID eq 'A' && defined $args{'HREF'})
- {
- $target = $args{'HREF'};
- if ($target =~ /([^:]+):\/\/([^\/]+)(.*)$/o
- || $target =~ /^(news|mailto):/o
- || $target =~ /^\//o)
- {
- }
- else
- {
- $target =~ s/#.*$//o;
- if ($target !~ /^\s*$/o && ! -f $target && ! -d $target)
- {
- &whine($., 'bad-link', $target);
- }
- }
- }
-
- if ($ID =~ /^H(\d)$/o)
- {
- if (defined $heading && $1 - $heading > 1)
- {
- &whine($., 'heading-order', $ID, $heading, $headingLine);
- }
- $heading = $1;
- $headingLine = $.;
- }
-
- #-- check for mailto: LINK ------------------------------
- if ($ID eq 'LINK' && $args{'REV'} =~ /^made$/io
- && $args{'HREF'} =~ /^mailto:/io)
- {
- $seenMailtoLink = 1;
- }
-
- if (defined $onceOnly{$ID})
- {
- &whine($., 'once-only', $ID, $seenTag{$ID}) if $seenTag{$ID};
- }
- $seenTag{$ID} = $.;
-
- &whine($., 'body-no-head') if $ID eq 'BODY' && !$seenTag{'HEAD'};
-
- if ($ID ne 'HTML' && $ID ne '!DOCTYPE' && !$seenTag{'HTML'}
- && !$whined{'outer-html'})
- {
- &whine($., 'html-outer');
- $whined{'outer-html'} = 1;
- }
-
- #-- check for illegally nested elements ---------------------
- if ($ID =~ /^($nonNest)$/o && $ID =~ /^($tagRE)$/)
- {
- for ($i=$#tags; $tags[$i] ne $ID; --$i)
- {
- }
- &whine($., 'nested-element', $ID, $taglines[$i]);
- }
-
- &whine($., 'unknown-element', $ID) unless $ID =~ /^($legalElements)$/o;
-
- #--------------------------------------------------------
- # check for tags which have a required context
- #--------------------------------------------------------
- if (defined ($reqCon = $requiredContext{$ID}))
- {
- $ok = 0;
- foreach $context (split(/\|/, $requiredContext{$ID}))
- {
- ($ok=1),last if $context =~ /^($tagRE)$/;
- }
- unless ($ok)
- {
- &whine($., 'required-context', $ID, $requiredContext{$ID});
- }
- }
-
- #--------------------------------------------------------
- # check for tags which can only appear in the HEAD element
- #--------------------------------------------------------
- if ($ID =~ /^($headTagsRE)$/o && 'HEAD' !~ /^($tagRE)$/)
- {
- &whine($., 'head-element', $ID);
- }
-
- if (! defined $okInHead{$ID} && 'HEAD' =~ /^($tagRE)$/)
- {
- &whine($., 'non-head-element', $ID);
- }
-
- #--------------------------------------------------------
- # check for tags which have been deprecated (now obsolete)
- #--------------------------------------------------------
- &whine($., 'obsolete', $ID) if $ID =~ /^($obsoleteTags)$/o;
- }
-
- #--------------------------------------------------------
- #== was tag of type <TAG> ... </TAG>?
- #== welcome to kludgeville, population seems to be on the increase!
- #--------------------------------------------------------
- if ($ID =~ /^($pairElements)$/o)
- {
- if (!$closing && $ID eq $tags[$#tags] &&
- $ID =~ /^($maybePaired)$/o)
- {
- pop @tags;
- pop @tagNums;
- pop @taglines;
- $tagRE = join('|',@tags);
- }
- if ($closing)
- {
- # trailing whitespace in content of container element
- if ($lastNonTag =~ /\S\s+$/o && $ID =~ /^($cuddleContainers)$/o)
- {
- &whine($., 'container-whitespace', 'trailing', $ID);
- }
-
- #-- if we have a closing tag, and the tag(s) on top of the stack
- #-- are optional closing tag elements, pop tag off the stack,
- #-- unless it matches the current closing tag
- if (@tags > 0 && $tags[$#tags] ne $ID
- && $tags[$#tags] =~ /^($maybePaired)$/o
- && $lastNonTag =~ /^\s*$/o
- && $tagNums[$#tagNums] == ($tagNum - 1)
- && $ID ne 'TD')
- {
- $tline = $taglines[$#taglines];
- &whine($tline, 'empty-container', $tags[$#tags]);
- }
-
- while (@tags > 0 && $tags[$#tags] ne $ID
- && $tags[$#tags] =~ /^($maybePaired)$/o)
- {
- pop @tags;
- pop @tagNums;
- pop @taglines;
- }
- $tagRE = join('|',@tags);
- }
- else
- {
- # leading whitespace in content of container element
- if ($line =~ /^\s+/o && $ID =~ /^($cuddleContainers)$/o)
- {
- &whine($., 'container-whitespace', 'leading', $ID);
- }
- }
-
- if ($closing && $tags[$#tags] eq $ID)
- {
- &PopEndTag();
- }
- elsif ($closing && $tags[$#tags] ne $ID)
- {
- #-- closing tag does not match opening tag on top of stack
- if ($ID =~ /^($tagRE)$/)
- {
- # If we saw </HTML>, </HEAD>, or </BODY>, then we try
- # and resolve anything inbetween on the tag stack
- if ($ID =~ /^(HTML|HEAD|BODY)$/o)
- {
- while ($tags[$#tags] ne $ID)
- {
- $ttag = pop @tags;
- pop @tagNums;
- $ttagline = pop @taglines;
- if ($ttag !~ /^($maybePaired)$/o)
- {
- &whine($., 'unclosed-element', $ttag, $ttagline);
- }
-
- #-- does top of stack match top of orphans stack? --
- while (@orphans > 0 && @tags > 0
- && $orphans[$#orphans] eq $tags[$#tags])
- {
- pop @orphans;
- pop @orphanlines;
- pop @tags;
- pop @tagNums;
- pop @taglines;
- }
- }
-
- #-- pop off the HTML, HEAD, or BODY tag ------------
- pop @tags;
- pop @tagNums;
- pop @taglines;
- $tagRE = join('|',@tags);
- }
- else
- {
- #-- matched opening tag lower down on stack
- push(@orphans, $ID);
- push(@orphanlines, $.);
- }
- }
- else
- {
- if ($ID =~ /^H[1-6]$/o && $tags[$#tags] =~ /^H[1-6]$/o)
- {
- &whine($., 'heading-mismatch', $tags[$#tags], $ID);
- &PopEndTag();
- }
- else
- {
- &whine($., 'mis-match', $ID);
- }
- }
- }
- else
- {
- push(@tags,$ID);
- $tagRE = join('|',@tags);
- push(@tagNums,$tagNum);
- push(@taglines,$.);
- }
- }
-
- #--------------------------------------------------------
- #== inline images (IMG) should have an ALT argument :-)
- #--------------------------------------------------------
- &whine($., 'img-alt') if ($ID eq 'IMG'
- && !defined $args{'ALT'}
- && !$closing);
-
- #--------------------------------------------------------
- #== WIDTH & HEIGHT on inline images (IMG) can help browsers
- #--------------------------------------------------------
- &whine($., 'img-size') if ($ID eq 'IMG'
- && !defined $args{'WIDTH'}
- && !defined $args{'HEIGHT'}
- && !$closing);
-
- } continue {
- $lastTagNum = $tagNum;
- ++$tagNum;
- $lastTAG = $TAG;
- }
- $lastNonTag = $line;
- }
- close PAGE;
-
- if (defined $commentline)
- {
- &whine($commentline, 'unclosed-comment');
- return;
- }
-
- while (@tags > 0)
- {
- $tag = shift(@tags);
- shift(@tagNums);
- $line = shift(@taglines);
- if ($tag !~ /^($maybePaired)$/o)
- {
- &whine($., 'unclosed-element', $tag, $line);
- }
- }
-
- for (@expectedTags)
- {
- # if we haven't seen TITLE but have seen HEAD
- # then we'll have already whined about the lack of a TITLE element
- next if $_ eq 'TITLE' && !$seenTag{$_} && $seenTag{'HEAD'};
- next if $_ eq 'BODY' && $seenTag{'FRAMESET'};
- push(@notSeen,$_) unless $seenTag{$_};
- }
- if (@notSeen > 0)
- {
- printf ("%sexpected tag(s) not seen: @notSeen\n",
- ($opt_s ? "" : "$filename(-): "));
- $exit_status = 1;
- }
- }
-
- #========================================================================
- # Function: CheckAttributes
- # Purpose: If the tag has attributes, check them for validity.
- #========================================================================
- sub CheckAttributes
- {
- undef %args;
-
- if ($closing == 0 && $tag =~ m|^(\S+)\s+(.*)|)
- {
- ($id,$tail) = ($1,$2);
- $ID = "\U$id";
-
- # don't worry, or warn, about attributes of unknown elements
- return unless $ID =~ /^($legalElements)$/io;
-
- $tail =~ s/\n/ /go;
-
- # check for odd number of quote characters
- ($quotes = $tail) =~ s/[^""]//go;
- &whine($., 'odd-quotes', $tag) if length($quotes) % 2 == 1;
-
- $novalue = 0;
- $valid = $validAttributes{$ID};
- while ($tail =~ /^\s*([^=\s]+)\s*=\s*(.*)$/o
- # catch attributes like ISMAP for IMG, with no arg
- || ($tail =~ /^\s*(\S+)(.*)/o && ($novalue = 1)))
- {
- $arg = "\U$1";
- $rest = $2;
-
- &whine($., 'unexpected-open', $tag) if $arg =~ /</o;
-
- if ($arg !~ /^($valid)$/i && $ID =~ /^($legalElements)$/o)
- {
- if ($arg =~ /^($netscapeAttributes{$ID})$/io
- || $arg =~ /^($msAttributes{$ID})$/io)
- {
- &whine($., 'extension-attribute', $arg, $id);
- }
- else
- {
- &whine($., 'unknown-attribute', $id, $arg);
- }
- }
-
- #-- catch repeated attributes. for example:
- #-- <IMG SRC="foo.gif" SRC="bar.gif">
- if (defined $args{$arg})
- {
- &whine($., 'repeated-attribute', $arg, $id);
- }
-
- if ($novalue)
- {
- $args{$arg} = '';
- $tail = $rest;
- }
- elsif ($rest =~ /^'([^'']+)'(.*)$/o)
- {
- &whine($., 'attribute-delimiter', $arg, $ID);
- $args{$arg} = $1;
- $tail = $2;
- }
- elsif ($rest =~ /^"([^""]*)"(.*)$/o
- || $rest =~ /^'([^'']*)'(.*)$/o)
- {
- $args{$arg} = $1;
- $tail = $2;
- }
- elsif ($rest =~ /^(\S+)(.*)$/o)
- {
- $attrValue = $1;
- $args{$arg} = $attrValue;
- $tail = $2;
- if ($attrValue =~ /[^-.A-Za-z0-9]/o)
- {
- &whine($., 'quote-attribute-value', $arg, $attrValue, $ID);
- }
- }
- else
- {
- $args{$arg} = $rest;
- $tail = '';
- }
- $novalue = 0;
- }
- foreach $attr (keys %args)
- {
- if (defined $attributeFormat{$attr} &&
- $args{$attr} !~ /^($attributeFormat{$attr})$/i)
- {
- &whine($., 'attribute-format', $attr, $id, $args{$attr});
- }
- }
- &whine($., 'unexpected-open', $tag) if $tail =~ /</o;
- }
- else
- {
- if ($closing && $id =~ m|^(\S+)\s+(.*)|)
- {
- &whine($., 'closing-attribute', $tag);
- $id = $1;
- }
- $ID = "\U$id";
- }
- }
-
- #========================================================================
- # Function: whine
- # Purpose: Give a standard format whine:
- # filename(line #): <message>
- # The associative array `enabled' is used as a gating
- # function, to suppress or enable each warning. Every
- # warning has an associated identifier, which is used to
- # refer to the warning, and as the index into the hash.
- #========================================================================
- sub whine
- {
- local($line, $id, @argv) = @_;
- local($mstyle) = $variable{'message-style'};
-
-
- return unless $enabled{$id};
- $exit_status = 1;
- (print "$filename:$line:$id\n"), return if $mstyle eq 'terse';
- (eval "print \"\$filename($line): $message{$id}\n\""), return if $mstyle eq 'lint';
- (eval "print \"line $line: $message{$id}\n\""), return if $mstyle eq 'short';
-
- die "Unknown message style `$mstyle'\n";
- }
-
- #========================================================================
- # Function: ReadConfigFile
- # Purpose: Read the specified configuration file. This is used to
- # the user's .weblintrc file, or the global system config
- # file, if the user doesn't have one.
- #========================================================================
- sub ReadConfigFile
- {
- local($filename) = @_;
- local(*CONFIG);
- local($arglist);
- local($keyword, $value);
- local($_);
-
-
- open(CONFIG,"< $filename") || do
- {
- print WARNING "Unable to read config file `$filename': $!\n";
- return;
- };
-
- while (<CONFIG>)
- {
- chop;
- s/#.*$//;
- next if /^\s*$/o;
-
- #-- match keyword: process one or more argument -------------------
- if (/^\s*(enable|disable|extension|ignore)\s+(.*)$/io)
- {
- $keyword = "\U$1";
- $arglist = $2;
- while ($arglist =~ /^\s*(\S+)/o)
- {
- $value = "\L$1";
-
- &enableWarning($1, 1) if $keyword eq 'ENABLE';
-
- &enableWarning($1, 0) if $keyword eq 'DISABLE';
-
- $ignore{"\U$1"} = 1 if $keyword eq 'IGNORE';
-
- &AddExtension("\L$1") if $keyword eq 'EXTENSION';
-
- $arglist = $';
- }
- }
- elsif (/^\s*set\s+(\S+)\s*=\s*(.*)/o)
- {
- # setting a weblint variable
- if (defined $variable{$1})
- {
- $variable{$1} = $2;
- }
- else
- {
- print WARNING "Unknown variable `$1' in configuration file\n";
- }
- }
- elsif (/^\s*use\s*global\s*weblintrc/o)
- {
- if (-f $SITE_RCFILE)
- {
- &ReadConfigFile($SITE_RCFILE);
- }
- else
- {
- print WARNING "$PROGRAM: unable to read global config file\n";
- next;
- }
- }
- else
- {
- print WARNING ("$PROGRAM: ignoring unknown sequence (\"$_\") ".
- "in config file $filename\n");
- }
- }
-
- close CONFIG;
- }
-
- #========================================================================
- # Function: enableWarning
- # Purpose: Takes a warning identifier and an integer (boolean)
- # flag which specifies whether the warning should be
- # enabled.
- #========================================================================
- sub enableWarning
- {
- local($id, $enabled) = @_;
-
-
- if (! defined $enabled{$id})
- {
- print WARNING "$PROGRAM: unknown warning identifier \"$id\"\n";
- return 0;
- }
-
- $enabled{$id} = $enabled;
-
- #
- # ensure consistency: if you just enabled upper-case,
- # then we should make sure that lower-case is disabled
- #
- $enabled{'lower-case'} = 0 if $_ eq 'upper-case';
- $enabled{'upper-case'} = 0 if $_ eq 'lower-case';
- $enabled{'upper-case'} = $enabled{'lower-case'} = 0 if $_ eq 'mixed-case';
-
- return 1;
- }
-
- #========================================================================
- # Function: AddExtension
- # Purpose: Extend the HTML understood. Currently supported extensions:
- # Netscape - the netscape extensions proposed by
- # Netscape Communications, Inc. See:
- # Java - Java elements
- # Microsoft - the extensions for Microsoft Internet
- # Explorer
- # http://www.netscape.com/home/services_docs/html-extensions.html
- #========================================================================
- sub AddExtension
- {
- local($extension) = @_;
- local(@extlist);
- local($element);
-
- if ($extension =~ /,/o)
- {
- @extlist = split(/\s*,\s*/, $extension);
- &AddExtension(shift @extlist) while @extlist > 0;
- return;
- }
-
- if ( $extension ne 'netscape'
- && $extension ne 'java'
- && $extension ne 'microsoft')
- {
- warn "$PROGRAM: unknown extension `$extension' -- ignoring.\n";
- return;
- }
-
- #---------------------------------------------------------------------
- # Java extensions
- #---------------------------------------------------------------------
-
- if ($extension eq 'java')
- {
- $legalElements .= '|'.$javaElements;
- $pairElements .= '|APPLET';
-
- &AddAttributes('APPLET', 'CODEBASE', 'CODE', 'ALT', 'NAME',
- 'WIDTH', 'HEIGHT', 'ALIGN', 'VSPACE', 'HSPACE');
- &AddAttributes('PARAM', 'NAME', 'VALUE');
-
- $requiredContext{'PARAM'} = 'APPLET';
- $requiredAttributes{'APPLET'} = 'CODE|WIDTH|HEIGHT';
- $requiredAttributes{'PARAM'} = 'NAME|VALUE';
-
- return;
- }
-
- #---------------------------------------------------------------------
- # Microsoft extensions
- #---------------------------------------------------------------------
- if ($extension eq 'microsoft')
- {
- #-- new element attributes for existing elements ---------------------
- foreach $element (keys %msAttributes)
- {
- &AddAttributes($element, split(/\|/, $msAttributes{$element}));
- }
-
- $legalElements .= '|'.$msElements;
- $pairElements .= '|CENTER|FONT|MAP|MARQUEE|NOBR';
- $expectArgsRE .= '|FONT';
-
- $attributeFormat{'LOOP'} = '\d+|INFINITE';
- $okInHead{'BGSOUND'} = 1;
- $requiredAttributes{'BGSOUND'} = 'SRC';
- $attributeFormat{'LEFTMARGIN'} = '\d+';
- $attributeFormat{'TOPMARGIN'} = '\d+';
-
- $requiredContext{'AREA'} = 'MAP';
- $requiredAttributes{'MAP'} = 'NAME';
- $requiredAttributes{'AREA'} = 'COORDS';
-
- #-- MARQUEE attributes
- $attributeFormat{'BEHAVIOR'} = 'SCROLL|SLIDE|ALTERNATE';
- $attributeFormat{'DIRECTION'} = 'LEFT|RIGHT';
- $attributeFormat{'WIDTH'} = '\d+%?';
- $attributeFormat{'HEIGHT'} = '\d+%?';
-
- # attribute format check for attributes which take colors
- $attributeFormat{'ALINK'} = $attributeFormat{'VLINK'} =
- $attributeFormat{'LINK'} = $attributeFormat{'TEXT'} =
- $attributeFormat{'BGCOLOR'} =
- $attributeFormat{'COLOR'} = $attributeFormat{'BORDERCOLOR'} =
- $attributeFormat{'BORDERCOLORLIGHT'} =
- $attributeFormat{'BORDERCOLORDARK'} = $colorRE.'|'.$msColors;
-
- }
-
- #---------------------------------------------------------------------
- # Netscape extensions
- #---------------------------------------------------------------------
-
- if ($extension eq 'netscape')
- {
- #-- new element attributes for existing elements ---------------------
- foreach $element (keys %netscapeAttributes)
- {
- &AddAttributes($element, split(/\|/, $netscapeAttributes{$element}));
- }
-
- #-- formats for new attributes ---------------------------------------
-
- $attributeFormat{'SIZE'} = '[-+]?\d+';
- $attributeFormat{'MARGINWIDTH'} = '\d+';
- $attributeFormat{'MARGINHEIGHT'} = '\d+';
- $attributeFormat{'SCROLLING'} = 'NO|YES|AUTO';
- $attributeFormat{'WIDTH'} = '\d+%?';
- $attributeFormat{'TYPE'} .= '|[AaIi1]|disc|square|circle';
-
- #-- new elements -----------------------------------------------------
-
- $legalElements .= '|'.$netscapeElements;
- $pairElements .= '|BLINK|CENTER|FONT|FRAMESET|NOFRAMES|NOBR|MAP|SCRIPT';
- $requiredContext{'AREA'} = 'MAP';
- $requiredContext{'FRAME'} = 'FRAMESET';
- $requiredAttributes{'MAP'} = 'NAME';
- $requiredAttributes{'AREA'} = 'COORDS';
-
- # this should really be specific to ROWS and COLS in FRAMESET element<
- $attributeFormat{'ROWS'} =
- $attributeFormat{'COLS'} = '\d+|(\d*[*%]?,)*\d*[*%]?';
-
- # this is for TEXTAREA
- $attributeFormat{'WRAP'} .= '|SOFT|HARD';
-
- # attribute format check for attributes which take colors
- $attributeFormat{'ALINK'} = $attributeFormat{'VLINK'} =
- $attributeFormat{'LINK'} = $attributeFormat{'TEXT'} =
- $attributeFormat{'BGCOLOR'} = $colorRE;
- $attributeFormat{'ALIGN'} .= '|TEXTTOP|ABSMIDDLE|BASELINE|ABSBOTTOM';
-
- # BASE can take just a TARGET attribute, HREF not required therefore
- delete $requiredAttributes{'BASE'};
-
- $expectArgsRE .= '|FONT|BASE';
-
- $okInHead{'SCRIPT'} = 1;
- }
- }
-
- sub AddAttributes
- {
- local($element,@attributes) = @_;
- local($attr);
-
-
- $attr = join('|', @attributes);
- if (defined $validAttributes{$element})
- {
- $validAttributes{$element} .= "|$attr";
- }
- else
- {
- $validAttributes{$element} = "$attr";
- }
- }
-
- #========================================================================
- # Function: ListWarnings()
- # Purpose: List all supported warnings, with identifier, and
- # whether the warning is enabled.
- #========================================================================
- sub ListWarnings
- {
- local($id);
- local($message);
-
-
- foreach $id (sort keys %enabled)
- {
- ($message = $message{$id}) =~ s/\$argv\[\d+\]/.../g;
- $message =~ s/\\"/"/g;
- print WARNING "$id (", ($enabled{$id} ? "enabled" : "disabled"), ")\n";
- print WARNING " $message\n\n";
- }
- }
-
- sub CheckURL
- {
- local($url) = @_;
- local($workfile) = "$TMPDIR/$PROGRAM.$$";
- local($urlget) = $variable{'url-get'};
-
-
- die "$PRORGAM: url-get variable is not defined -- ".
- "don't know how to get $url\n" unless defined $urlget;
-
- system("$urlget $url > $workfile");
- &WebLint($workfile, $url);
- unlink $workfile;
- }
-
- sub PrintToDo
- {
- print STDERR "$todo";
- exit 0;
- }
-
- #========================================================================
- # Function: wanted
- # Purpose: This is called by &find() to determine whether a file
- # is wanted. We're looking for files, with the filename
- # extension .html or .htm.
- #========================================================================
- sub wanted
- {
- local($foundIndex);
-
- if (-d $_)
- {
- $foundIndex = 0;
- foreach $legalIndex (@dirIndices)
- {
- $foundIndex=1,last if -f "$_/$legalIndex";
- }
- if (! $foundIndex)
- {
- &whine("$arg/$_", 'directory-index', "@dirIndices");
- }
- }
-
- /\.($fileExtensions)$/ && # valid filename extensions: .html .htm
- -f $_ && # only looking for files
- (!$opt_l || !-l $_) && # ignore symlinks if -l given
- &WebLint($_,$name); # check the file
- }
-
- sub PopEndTag
- {
- $matched = pop @tags;
- pop @tagNums;
- $matchedLine = pop @taglines;
-
- #-- does top of stack match top of orphans stack? --------
- while (@orphans > 0 && @tags > 0
- && $orphans[$#orphans] eq $tags[$#tags])
- {
- &whine($., 'element-overlap', $orphans[$#orphans],
- $orphanlines[$#orphanlines], $matched, $matchedLine);
- pop @orphans;
- pop @orphanlines;
- pop @tags;
- pop @tagNums;
- pop @taglines;
- }
- $tagRE = join('|',@tags);
- }
-
- #========================================================================
- # Function: PickTmpdir
- # Purpose: Pick a temporary working directory. If TMPDIR environment
- # variable is set, then we try that first.
- #========================================================================
- sub PickTmpdir
- {
- local(@options) = @_;
- local($tmpdir);
-
- @options = ($ENV{'TMPDIR'}, @options) if defined $ENV{'TMPDIR'};
- foreach $tmpdir (@options)
- {
- return $tmpdir if -d $tmpdir && -w $tmpdir;
- }
- die "$PROGRAM: unable to find a temporary directory.\n",
- ' ' x (length($PROGRAM)+2), "tried: ",join(' ',@options),"\n";
- }
-
- #========================================================================
- # Function: ReadDefaults
- # Purpose: Read the built-in defaults. These are stored at the end
- # of the script, after the __END__, and read from the
- # DATA filehandle.
- #========================================================================
- sub ReadDefaults
- {
- local(@elements);
-
-
- while (<DATA>)
- {
- chop;
- s/^\s*//;
- next if /^$/;
-
- push(@elements, $_);
-
- next unless @elements == 3;
-
- ($id, $default, $message) = @elements;
- $enabled{$id} = ($default eq 'ENABLE');
- ($message{$id} = $message) =~ s/"/\\"/g;
- undef @elements;
- }
- }
-
- __END__
- upper-case
- DISABLE
- tag <$argv[0]> is not in upper case.
- lower-case
- DISABLE
- tag <$argv[0]> is not in lower case.
- mixed-case
- ENABLE
- tag case is ignored
- here-anchor
- ENABLE
- bad form to use `here' as an anchor!
- require-head
- ENABLE
- no <TITLE> in HEAD element.
- once-only
- ENABLE
- tag <$argv[0]> should only appear once. I saw one on line $argv[1]!
- body-no-head
- ENABLE
- <BODY> but no <HEAD>.
- html-outer
- ENABLE
- outer tags should be <HTML> .. </HTML>.
- head-element
- ENABLE
- <$argv[0]> can only appear in the HEAD element.
- non-head-element
- ENABLE
- <$argv[0]> cannot appear in the HEAD element.
- obsolete
- ENABLE
- <$argv[0]> is obsolete.
- mis-match
- ENABLE
- unmatched </$argv[0]> (no matching <$argv[0]> seen).
- img-alt
- ENABLE
- IMG does not have ALT text defined.
- nested-element
- ENABLE
- <$argv[0]> cannot be nested -- </$argv[0]> not yet seen for <$argv[0]> on line $argv[1].
- mailto-link
- DISABLE
- did not see <LINK REV=MADE HREF="mailto..."> in HEAD.
- element-overlap
- ENABLE
- </$argv[0]> on line $argv[1] seems to overlap <$argv[2]>, opened on line $argv[3].
- unclosed-element
- ENABLE
- no closing </$argv[0]> seen for <$argv[0]> on line $argv[1].
- markup-in-comment
- ENABLE
- markup embedded in a comment can confuse some browsers.
- unknown-attribute
- ENABLE
- unknown attribute "$argv[1]" for element <$argv[0]>.
- leading-whitespace
- ENABLE
- should not have whitespace between "<" and "$argv[0]>".
- required-attribute
- ENABLE
- the $argv[0] attribute is required for the <$argv[1]> element.
- unknown-element
- ENABLE
- unknown element <$argv[0]>.
- odd-quotes
- ENABLE
- odd number of quotes in element <$argv[0]>.
- heading-order
- ENABLE
- bad style - heading <$argv[0]> follows <H$argv[1]> on line $argv[2].
- bad-link
- DISABLE
- target for anchor "$argv[0]" not found.
- expected-attribute
- ENABLE
- expected an attribute for <$argv[0]>.
- unexpected-open
- ENABLE
- unexpected < in <$argv[0]> -- potentially unclosed element.
- required-context
- ENABLE
- illegal context for <$argv[0]> - must appear in <$argv[1]> element.
- unclosed-comment
- ENABLE
- unclosed comment (comment should be: <!-- ... -->).
- illegal-closing
- ENABLE
- element <$argv[0]> is not a container -- </$argv[0]> not legal.
- extension-markup
- ENABLE
- <$argv[0]> is extended markup (use "-x <extension>" to allow this).
- extension-attribute
- ENABLE
- attribute `$argv[0]' for <$argv[1]> is extended markup (use "-x <extension>" to allow this).
- physical-font
- DISABLE
- <$argv[0]> is physical font markup -- use logical (such as $argv[1]).
- repeated-attribute
- ENABLE
- attribute $argv[0] is repeated in element <$argv[1]>
- must-follow
- ENABLE
- <$argv[0]> must immediately follow <$argv[1]>
- empty-container
- ENABLE
- empty container element <$argv[0]>.
- directory-index
- ENABLE
- directory does not have an index file ($argv[0])
- closing-attribute
- ENABLE
- closing tag <$argv[0]> should not have any attributes specified.
- attribute-delimiter
- ENABLE
- use of ' for attribute value delimiter is not supported by all browsers (attribute $argv[0] of tag $argv[1])
- img-size
- DISABLE
- setting WIDTH and HEIGHT attributes on IMG tag can improve rendering performance on some browsers.
- container-whitespace
- DISABLE
- $argv[0] whitespace in content of container element $argv[1]
- require-doctype
- DISABLE
- first element was not DOCTYPE specification
- literal-metacharacter
- ENABLE
- metacharacter '$argv[0]' should be represented as '$argv[1]'
- heading-mismatch
- ENABLE
- malformed heading - open tag is <$argv[0]>, but closing is </$argv[1]>
- bad-text-context
- ENABLE
- illegal context, <$argv[0]>, for text; should be in $argv[1].
- attribute-format
- ENABLE
- illegal value for $argv[0] attribute of $argv[1] ($argv[2])
- quote-attribute-value
- ENABLE
- value for attribute $argv[0] ($argv[1]) of element $argv[2] should be quoted (i.e. $argv[0]="$argv[1]")
- meta-in-pre
- ENABLE
- you should use '$argv[0]' in place of '$argv[1]', even in a PRE element.
- heading-in-anchor
- ENABLE
- <A> should be inside <$argv[0]>, not <$argv[0]> inside <A>.
- title-length
- ENABLE
- The HTML spec. recommends the TITLE be no longer than 64 characters.
-